# -*- coding:utf8 -*-
# !/usr/bin/env python

"""
#全国企业信用信息公示系统（江西）
#维护黄羽
"""

import re
import requests
import traceback
import time
import copy
from bs4 import BeautifulSoup

from utils import kill_captcha
from scpy.logger import get_logger
import sd_template_dict as TE
import table

logger = get_logger(__file__)


def extract_share_holder_detail(detail_res):
    detail_td = re.findall("<td.*?>(.*?)</td>", str(detail_res), re.S)

    subConam = float(detail_td[1]) if detail_td and len(detail_td) > 1 else ""
    conDate = table.parse_time(detail_td[5]) if detail_td and len(detail_td) > 6 else ""
    detail_dict = {"shareholderType": "", "shareholderName": detail_td[0], "regCapCur": "", "country": "",
                   "fundedRatio": "", "subConam": subConam, "conDate": conDate}
    return detail_dict


UserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"


def download_captcha_kill(companyName):
    """
    下载验证码，破解，然后搜索公司，获取公司的一系列代码，用于构建后面的请求 referer 和 url
    :param companyName:
    :return:
            若验证码破解成功且公司存在公司，返回获取公司的一系列代码：post_args_list。
            若公司不存在返回None
            若破解的验证码错误，返回''
            若破解过程、访问网页出现失败，抛出异样
    """
    if not companyName:
        raise ValueError("input error!")

    index_url = "http://gsxt.jxaic.gov.cn/ECPS/"
    req = requests.session()
    img_url = r'http://gsxt.jxaic.gov.cn/ECPS/common/common_getJjYzmImg.pt?yzmName=searchYzm&imgWidth=180&t=0.5804444200981667'
    req.headers = {
        'Connection': 'keep-alive',
        'Host': 'gsxt.jxaic.gov.cn',
        'User-Agent': UserAgent,
    }
    index_res = req.get(index_url)
    # cookies = index_res.cookies.get_dict()
    img_res = req.get(img_url, timeout=100).content
    if not img_res:  # 判断下载的验证码是否正确
        logger.info('下载的验证码为空,:%s' % img_res)
        return ''
    # 验证码
    res_code = kill_captcha(img_res, "jx", "jpg")
    # print img_res
    # with open(r'./jx.jpg', 'wb') as fp:
    #     fp.write(img_res)
    # res_code = raw_input("input")
    print res_code
    if not res_code or len(res_code) > 100 or str(res_code) in ['None', 'wrong']:
        logger.error('破解验证码服务器返回的验证码格式错误')
        logger.error('验证码为:%s' % res_code)
        # 返回空字符串，用于重复破解,len(res_Code) > 100,服务器返回的是500的页面
        return ''

    # 发送识别验证码数字到工商网
    check_dict_1 = {'yzm': res_code, 'searchtext': companyName}
    check_url_1 = 'http://gsxt.jxaic.gov.cn/ECPS/home/home_homeSearchYzm.pt'
    res_message = req.post(check_url_1, data=check_dict_1, timeout=100).content
    if res_message != '{"msg":"true","success":true}':
        logger.info('网站返回验证码消息为:%s' % res_message)
        return ''
    check_url_2 = 'http://gsxt.jxaic.gov.cn/ECPS/home/home_homeSearch.pt'
    check_params_2 = {'yzm': res_code, 'search': companyName}
    check_res = req.get(check_url_2, params=check_params_2).content

    if '无数据...' in check_res:
        logger.info("验证码错误或网页错误")
        return ''

    com_list = re.findall('(http://gsxt\.jxaic\.gov\.cn:80/ECPS/ccjcgs/ccjcgs_ccjcgsIndexDetail\.pt.*?)"', check_res)
    if not com_list:
        logger.info("搜索的公司不存在！输入的关键字为:%s" % companyName)
        return None

    return com_list[0]


def get_company_info(com_info):
    if not com_info:
        raise Exception("com_list 错误")
    raw_dict = {
        "province": "jx",
        "type": "1",
        "html": "",
        "yearList": [],
        "keyword": "",
        "companyName": "",
        "json": "",
    }
    raw_html = {}

    qylx = re.findall('qylx=(.*?)&', com_info)
    qyid = re.findall('qyid=(.*?)&', com_info)
    zch = re.findall('zch=(.*?)&', com_info)
    tab_name = re.findall('tabName=(.*)', com_info)
    if not qylx or not qyid or not zch or not tab_name:
        raise Exception("com_list 错误")
    else:
        qylx = qylx[0]
        qyid = qyid[0]
        zch = zch[0]
        tab_name = tab_name[0]

    root_url = 'http://gsxt.jxaic.gov.cn'
    req = requests.session()
    req.headers = {
        'Connection': 'keep-alive',
        'Host': 'gsxt.jxaic.gov.cn',
        'User-Agent': UserAgent,
    }
    index_res = req.get(com_info).content
    raw_html['index'] = index_res

    # 基本信息
    base_url = root_url + '/ECPS/ccjcgs/gsgs_viewDjxx.pt'
    base_res = req.get(base_url,
                       params={'qyid': qyid, 'zch': zch, 'qylx': qylx, 'num': 'undefined', 'showgdxx': 'true'}).content
    raw_html['base'] = base_res

    # 股东信息
    share_url = root_url + '/ECPS/ccjcgs/gsgs_viewDjxxGdxx.pt'
    share_res = req.post(share_url, params={'qyid': qyid}, data={'page': 1, 'limit': 100, 'mark': 0}).content
    raw_html['share'] = share_res

    # 股东详情
    # todo
    # xh_s = re.findall('''tzrczxx\(\'(.*?)\'\)''', share_res, re.S)
    # for a_xh in xh_s:
    #     item_url = 'http://gsxt.jxaic.gov.cn/ECPS/tzrczxxAction_tzrczxxxx.action?xh='+xh[0]+'&qylx='+qylx+'&nbxh='+nbxh

    # 变更信息
    alter_url = root_url + '/ECPS/ccjcgs/gsgs_viewDjxxBgxx.pt'
    alter_res = req.post(alter_url, params={'qyid': qyid}, data={'page': 1, 'limit': 100, 'mark': 0}).content
    raw_html['alter'] = alter_res

    time.sleep(2)
    # 备案信息
    ba_url = root_url + '/ECPS/ccjcgs/gsgs_viewBaxx.pt'
    ba_res = req.get(ba_url, params={'qyid': qyid, 'zch': zch, 'qylx': qylx, 'showgdxx': 'true'}).content
    raw_html['beian'] = ba_res

    time.sleep(2)
    # 抽查检查信息
    check_url = root_url + '/ECPS/ccjcgs/gsgs_viewCcjcxx.pt'
    check_res = req.get(check_url, params={'qyid': qyid, 'zch': zch, 'qylx': qylx, 'showgdxx': 'true'}).content
    raw_html['check'] = check_res
    time.sleep(2)
    # 经营异常信息
    abnormal_res = root_url + '/ECPS/ccjcgs/gsgs_viewJyycxx.pt'
    abnormal_res = req.get(abnormal_res, params={'qyid': qyid, 'zch': zch, 'qylx': qylx, 'showgdxx': 'true'}).content
    raw_html['abnormal'] = abnormal_res
    time.sleep(2)
    # 年报
    year_index_url = root_url + '/ECPS/ccjcgs/qygs_ViewQynb.pt'
    year_index_res = req.get(year_index_url,
                             params={'qyid': qyid, 'zch': zch, 'qylx': qylx, 'num': 0, 'showgdxx': 'true'}).content
    raw_html['year_index'] = year_index_res
    raw_dict['html'] = raw_html

    year_url_list = re.findall('(/ECPS/ccjcgs/.*?)"', year_index_res)
    raw_year_list = []
    for a_url in year_url_list:
        time.sleep(2)
        year = re.findall('nbnd=(.*?)&', a_url)
        year = year[0] if year else ""
        year_res = req.get(root_url + a_url).content
        raw_year_list.append({'year': year, 'base': year_res})

    raw_dict['yearList'] = raw_year_list

    return raw_dict


def extract_base_info(raw_dict):
    if not raw_dict:
        raise Exception("raw_dict 错误")

    raw_html = raw_dict.get("html", {})
    if not raw_html:
        raise Exception("raw_dict 错误")

    res_dict = copy.deepcopy(TE.void_base_dict)

    # 基本信息
    raw_base = raw_html.get("base", "")
    base_table = table.table_clean(raw_base, "基本信息")
    res_base_list = table.index("基本信息", base_table) if base_table else []
    res_dict['province'] = 'jx'
    res_dict['basicList'] = res_base_list

    # 股东信息
    raw_share = raw_html.get("share", "")
    share_table = table.table_clean(raw_share, "股东信息") + table.table_clean(raw_share, "股东（发起人）信息")
    res_share_list = table.index("股东信息", share_table) if share_table else []
    for item in res_share_list:
        if 'shareHolderdetail' in item:
            item.pop("shareHolderdetail")
        item['country'] = ''
        item['subConam'] = ''
        item['regCapCur'] = ''
        item['conDate'] = ''
        item['fundedRatio'] = ''
    res_dict['shareHolderList'] = res_share_list
    # 股东详细信息
    # todo

    raw_ba = raw_html.get("beian", "")
    # 主要人员信息
    person_table = table.table_clean(raw_ba, "主要人员信息")
    res_dict['personList'] = table.index("主要人员信息", person_table) if person_table else []

    # 分支机构信息
    branch_table = table.table_clean(raw_ba, "分支机构信息")
    res_dict['filiationList'] = table.index("分支机构信息", branch_table) if branch_table else []

    # 清算信息
    liquidation_table = table.table_clean(raw_ba, "清算信息")
    res_dict['liquidationList'] = table.index("清算信息", liquidation_table) if liquidation_table else []

    # 经营异常信息
    raw_abnormal = raw_html.get("abnormal", "")
    abnormal_table = table.table_clean(raw_abnormal, "经营异常信息")
    res_dict['abnormalOperation'] = table.index("经营异常信息", abnormal_table) if abnormal_table else []

    # 抽查检查信息
    raw_check = raw_html.get("check", "")
    check_table = table.table_clean(raw_check, "抽查检查信息")
    res_dict['checkMessage'] = table.index("抽查检查信息", check_table) if check_table else []

    return res_dict


def extract_year_info(raw_dict):
    # 年报
    if not raw_dict:
        return None
    raw_year_list = raw_dict.get("yearList", [])
    if not raw_year_list:
        return []

    res_year_list = []
    for a_raw_year_item in raw_year_list:
        res_year_dict = copy.deepcopy(TE.void_year_dict)
        res_year_dict["year"] = a_raw_year_item.get("year", "")
        raw_year_base = a_raw_year_item.get("base", "")

        # 基本信息
        year_base = table.table_clean(raw_year_base, "企业基本信息") or table.table_clean(raw_year_base, "基本信息")
        res_year_dict['baseInfo'] = table.report_index("企业基本信息", year_base) if year_base else {}

        # 网站或网店信息
        year_web_table = table.table_clean(raw_year_base, "网站或网店信息")
        res_year_dict["website"] = table.report_index("网站或网店信息", year_web_table) if year_web_table else {}

        # 股东及出资信息
        year_share_table = table.table_clean(raw_year_base, "股东（发起人）及出资信息")
        res_year_dict["investorInformations"] = table.report_index("股东及出资信息",
                                                                   year_share_table) if year_share_table else []

        # 对外投资信息
        year_invest_table = table.table_clean(raw_year_base, "对外投资信息")
        res_year_dict["entinvItemList"] = table.report_index("对外投资信息", year_invest_table) if year_invest_table else []

        # 企业资产状况信息
        year_assets_table = table.table_clean(raw_year_base, "企业资产状况信息")
        res_year_dict["assetsInfo"] = table.report_index("企业资产状况信息", year_assets_table) if year_assets_table else {}

        # 股权变更信息
        year_equity_table = table.table_clean(raw_year_base, "股权变更信息")
        res_year_dict["equityChangeInformations"] = table.report_index("股权变更信息",
                                                                       year_equity_table) if year_equity_table else []

        # 修改记录
        year_change_table = table.table_clean(raw_year_base, "修改记录")
        res_year_dict["changeRecords"] = table.report_index("修改记录", year_change_table) if year_change_table else []

        res_year_list.append(res_year_dict)

    return res_year_list


def search2(companyName, MAXTIME=40):
    res = ''
    asic_dict = {}
    # MAXTIME = 20
    a_time = MAXTIME
    while a_time > 0:
        # print res, '*'*20
        if res is None:  # 公司不存在
            return None
        elif res == '':  # 验证码错误
            if a_time < MAXTIME:
                logger.error("重复破解验证码!当前设定重复破解次数为:%s, 剩余次数为:%s " % (MAXTIME, a_time))
            a_time -= 1
            try:
                # time.sleep(10)
                res = download_captcha_kill(companyName)
                # print res
            except Exception, e:
                traceback.print_exc(e)
                raise e
        else:
            break
    com_list = res
    res = get_company_info(com_list)
    if a_time <= 1 and res == '':
        raise Exception("多次破解验证码错误,当前设置次数为：%s" % MAXTIME)
    else:
        raw_dict = res
        try:
            asic_dict = extract_base_info(raw_dict)
            year_list = extract_year_info(raw_dict)
            res['companyName'] = asic_dict['basicList'][0].get('enterpriseName', '')

            asic_dict['yearReportList'] = year_list
            gate_method = {
                'url': 'http://gsxt.jxaic.gov.cn',
                'method': 'post',
                'province': 'jx',
                'companyName': asic_dict['basicList'][0].get('enterpriseName', ''),
                'data': com_list,
            }

            return res, asic_dict, gate_method
        except Exception, e:
            logger.info(e)
            res['companyName'] = companyName
            gate_method = {
                'url': 'http://gsxt.jxaic.gov.cn',
                'method': 'post',
                'province': 'jx',
                'companyName': companyName,
                'data': com_list,
            }
            return res, None, gate_method


def search(companyName):
    res = search2(companyName)
    if not res:
        return None
    else:
        return res[1]


def search3(gate_method):
    if 'data' not in gate_method:
        raise Exception("gate_method error, doesn't have `data` key")
    com_list = gate_method.get('data')
    res = get_company_info(com_list)
    companyName = gate_method.get('companyName', '')

    raw_dict = res
    try:
        asic_dict = extract_base_info(raw_dict)
        year_list = extract_year_info(raw_dict)
        res['companyName'] = asic_dict['basicList'][0].get('enterpriseName', '')

        asic_dict['yearReportList'] = year_list
        gate_method = {
            'url': 'http://gsxt.jxaic.gov.cn',
            'method': 'post',
            'province': 'jx',
            'companyName': asic_dict['basicList'][0].get('enterpriseName', ''),
            'data': com_list,
        }

        return res, asic_dict, gate_method
    except Exception, e:
        logger.info(e)
        res['companyName'] = companyName
        gate_method = {
            'url': 'http://gsxt.jxaic.gov.cn',
            'method': 'post',
            'province': 'jx',
            'companyName': companyName,
            'data': com_list,
        }
        return res, None, gate_method


if __name__ == "__main__":
    # 正常公司
    # import pymongo
    # import json
    # # pymongo.MongoClient('192.168.31.121', 27017)
    # clientServer = pymongo.MongoClient('192.168.31.121',27017)
    # db = clientServer.crawler_company_name
    # collectionServer = db.companyName
    # reg_no_s = collectionServer.find({'province': 'jx'}).limit(100)
    # for reg in reg_no_s:
    #     print '#'*10
    #     print reg
    #     print '#'*10
    #     reg_no = reg['regNo']
    #     try:
    #         res = search(reg_no)
    #         print json.dumps(res, indent=4, ensure_ascii=False)
    #     except Exception, e:
    #         import traceback
    #         traceback.print_exc()
    #         # print reg
    #         import pdb
    #         pdb.set_trace()

    # companyName = u'江西泓泰企业集团有限公司'
    # # 经营异常公司
    # companyName = u'吉水县天宇科技有限责任公司'
    # companyName = u'江西胜木防火建材保温制造有限公司'
    # companyName = u'景德镇景乐矿产地质勘查有限公司'
    # 抽查检查
    companyName = u'九江宝利投资咨询有限公司'

    # companyName = u'江西猪八戒'
    # companyName = u'南昌金鼎营销有限公司'


    res = search2(companyName)
    import json

    print json.dumps(res, indent=4, ensure_ascii=False)
    #
    #

    # #!/usr/bin/env python
    # import requests
    # import base64
    # auth = 'lum-customer-socialcredits-zone-gen:a98d2b7b4b0e'
    # print(requests.get('http://lumtest.com/myip.json', proxies = {'http': 'http://'+auth+'@zproxy.luminati.io:22225'},
    #     headers = {'Proxy-Authorization': 'Basic '+base64.b64encode(auth.encode('utf-8')).decode('utf-8')}).text)
    #
    #
